import re
import requests
import csv
domain = 'https://www.dytt89.com/'

res = requests.get(domain, verify=False)  # verify=False 去掉安全验证

res.encoding = 'gb2312'  # 指定字符集
# 默认是 utf-8,编码格式需要当前对应的编码
content1 = res.text
child_url_list = []
obj1 = re.compile(r'2021必看热片.*?<ul>(?P<ul>.*?)</ul>', re.S)  # re.S 可以匹配换行符
obj2 = re.compile(r'<a href=\'(?P<child_url>.*?)\'',re.S)
obj3 = re.compile(r'◎片　　名\u3000(?P<name>.*?)<br />.*?'
                  r'<td style="WORD-WRAP: break-word" bgcolor="#fdfddf"><a href="(?P<download>.*?)"',re.S)

result1 = obj1.finditer(content1)
for i in result1:
    result2 = obj2.finditer(i.group('ul'))
    for ii in result2:
        child_url =domain +  ii.group('child_url').strip('/')
        child_url_list.append(child_url)

res.close()
f = open('./data1.csv','w',encoding='utf-8')
csvwrite = csv.writer(f)
for url in child_url_list:
    res = requests.get(url,verify=False)
    res.encoding = 'gb2312'  # 指定字符集
    result3 = obj3.finditer(res.text)
    for i in result3:
        csvwrite.writerow(i.groupdict().values())
    res.close()
f.close()